* This dofile is to construct a dataset of mentors for grade 10 students: DONE 

cap log close 
log using "$logfile/cleaning_grade10_mentor_$date.log", replace 

cap program drop standardization 
program standardization, rclass 
	args i
	cap drop z_`i'
	recode `i' (99=.)
	egen mean`i' = mean(`i')
	egen sd`i' = sd(`i')
	g zm_`i' = (`i' - mean`i')/sd`i'
	drop mean`i' sd`i'
end

* open the raw data 
import delimited "$raw/mentor/20210918121943-SurveyExport-Science.csv", varnames(1) encoding(UTF-8) clear 

* rename variables 
rename whatisyourname name_m 
rename whatisyourgender mentor_sex 
recode mentor_sex (2=0)
rename whatisyourmobilephonenumber mentor_phone
tostring mentor_phone, replace
rename whatisyouremailaddresswritenaify mentor_email 
replace mentor_email = trim(itrim(lower(mentor_email)))

rename dateofbirthdaywhatisyourdateofbi mentor_bday
rename dateofbirthmonthwhatisyourdateof mentor_bmonth
rename dateofbirthyearwhatisyourdateofb mentor_byear 
rename whichdzongkhagareyouoriginallyfr mentor_dist_origin 
rename whichdzongkhagareyoucurrentlyliv mentor_dist_current
rename whichhighschooldidyougraduatefro mentor_school
replace mentor_school=pleasespecifythenameofhighschool if mentor_school=="Other (Specify)"

rename mathwhatwereyourperformancesonth mentor_emath
rename dzongkhawhatwereyourperformances mentor_elang
rename englishwhatwereyourperformanceso mentor_eeng
rename physicswhatwereyourperformanceso mentor_ephys
rename chemistrywhatwereyourperformance mentor_echem
rename biologywhatwereyourperformanceso mentor_ebio
rename businessmathwhatwereyourperforma mentor_ebmath

rename whichyearareyouinatcollege mentor_years


* satisfaction & recommendation abt current study program
rename areyousatisfiedwiththeprogramyou mentor_progsat
rename howmuchdoyourecommendyourprogram mentor_progrec

* perception about science & technology
rename scienceandtechnologyaremakingour b2a
rename allthingsconsideredscienceandtec b2d

// need to do standardization for the sample of chosen mentors only!!!
g attitude_stem = (b2a*(b2a~=99) + b2d*(b2d~=99))/((b2a~=99) + (b2d~=99))
standardization attitude_stem
lab var zm_attitude_stem "Mentor: standardized attitude to science and technology (baseline)"
cap drop attitude_stem 

* gender bias in STEM
rename scienceeducationismoreimportantf b4a // reverse 
rename onaveragemenhavehigherabilityins b4b // reverse 
rename ifwomenhavestemmsciencetechnolog b4c // reverse 
rename stemmrelatedjobsareformenthanwom b4d // reverse 
rename whenjobsarescarcemenshouldhavemo b5b // reverse 
rename whenamotherworksforpaythechildre b5c // reverse 

// need to do standardization for the sample of chosen mentors only!!!
g genderbias = (b4a*(b4a~=99) + b4b*(b4b~=99) + b4c*(b4c~=99) + b4d*(b4d~=99) + b5b*(b5b~=99) + b5c*(b5c~=99))/((b4a~=99) + (b4b~=99) + (b4c~=99) + (b4d~=99) + (b5b~=99) + (b5c~=99))
standardization genderbias
lab var zm_genderbias "mentor: gender bias (baseline): higher value means more biased against females in STEM"

* experience as mentee 
rename haveyoueverhadamentorofyourown mentor_beenmentee 
recode mentor_beenmentee (2=0)
rename ifyouhavebeenamenteewereyousatis mentor_beenmentee_sat 

rename approximatelyhowmanypersonsmente mentor_exp
rename v226 mentor_expstem

* Big Five 
rename isreservedhowmuchdoyouagreeordis p1a
rename tendstobelazyhowmuchdoyouagreeor p1b
rename isoutgoingsociablehowmuchdoyouag p1c
rename doesathoroughjobhowmuchdoyouagre p1d
rename getsnervouseasilyhowmuchdoyouagr p1e
rename hasanactiveimaginationhowmuchdoy p1f 
rename worriesalothowmuchdoyouagreeordi p1g
rename remainscalmintensesituationshowm p1h
rename istalkativehowmuchdoyouagreeordi p1i
rename isoriginalcomesupwithnewideashow p1j
rename valuesartisticaestheticexperienc p1k
rename issometimesrudetoothershowmuchdo p1l
rename hasaforgivingnaturehowmuchdoyoua p1m
rename isconsiderateandkindtoalmostever p1n
rename doesthingsefficientlyhowmuchdoyo p1o

g b5extro = ((5 - p1a)*(p1a~=99) + p1c*(p1c~=99) + p1i*(p1i~=99))/((p1a~=99) + (p1c~=99) + (p1i~=99))
g b5agree = ((5 - p1l)*(p1l~=99) + p1m*(p1m~=99) + p1n*(p1n~=99))/((p1l~=99) + (p1m~=99) + (p1n~=99))
g b5consci = (p1d*(p1d~=99) + (5 - p1b)*(p1b~=99) + p1o*(p1o~=99))/((p1d~=99) + (p1b~=99) + (p1o~=99))
g b5neuro = (p1e*(p1e~=99) + p1g*(p1g~=99) + (5 - p1h)*(p1h~=99))/((p1e~=99) + (p1g~=99) + (p1h~=99))
g b5open = (p1j*(p1j~=99) + p1k*(p1k~=99) + p1f*(p1f~=99))/((p1j~=99) + (p1k~=99) + (p1f~=99))

// need to do standardization for the sample of chosen mentors only!!!
foreach variable in b5extro b5agree b5consci b5neuro b5open {
standardization `variable'
}
lab var zm_b5extro "mentor standardized: big 5 extroversion"
lab var zm_b5agree "mentor standardized: big 5 agreeness"
lab var zm_b5consci "mentor standardized: big 5 conscientiousness"
lab var zm_b5neuro "mentor standardized: big 5 neuroticism"
lab var zm_b5open "mentor standardized: big 5 openness"

* hobby 
rename listentomusichowmuchdoyoulikethe mentor_hobby1 
rename playmusichowmuchdoyoulikethefoll mentor_hobby2 
rename watchsportshowmuchdoyoulikethefo mentor_hobby3
rename playsportshowmuchdoyoulikethefol mentor_hobby4
rename watchtvhowmuchdoyoulikethefollow mentor_hobby5
rename playvideoandonlinegameshowmuchdo mentor_hobby6

* academic stream 
rename whichacademicstreamdidyoustudyin mentor_stream

* format name
replace name_m=lower(name_m)
replace name_m = subinstr(name_m,"@gemailcom","",.)   //Removes irrelevant info
replace name_m = subinstr(name_m,"@gmailcom","",.)   //Removes irrelevant info
g mentor_name = name_m
replace mentor_name = trim(itrim(lower(mentor_name)))
replace mentor_name = subinstr(mentor_name," ","",.)  
replace mentor_name = subinstr(mentor_name,",","",.)   //Removes comma (,)
replace mentor_name = subinstr(mentor_name,"'","",.)   //Removes apostrophe (')
replace mentor_name = subinstr(mentor_name,".","",.)   //Removes dot (.) 
replace mentor_name = subinstr(mentor_name,"/","",.)   //Removes slash (/)
replace mentor_name = subinstr(mentor_name,"-","",.)   //Removes dash (-)
replace mentor_name = subinstr(mentor_name,"=","",.)   //Removes dash (-)
replace mentor_name = subinstr(mentor_name,"(","",.)   //Removes opening parentheses
replace mentor_name = subinstr(mentor_name,")","",.)   //Removes closing parentheses
replace mentor_name = subinstr(mentor_name,"mynameis","",.)   //Removes irrelevant info
replace mentor_name = subinstr(mentor_name,"@gmailcom","",.)   //Removes irrelevant info
replace mentor_name = subinstr(mentor_name,"@gemailcom","",.)   //Removes irrelevant info
format mentor_name %25s

* for duplicated submissions, keep the earlier ones 
sort mentor_name mentor_phone responseid 
drop if mentor_name==mentor_name[_n-1] & mentor_phone==mentor_phone[_n-1]

* keep relevant variable 
keep mentor* responseid zm_* b5*
order mentor_name

save $temp/temp, replace 

* retrieve assigned mentorid 
clear 
import excel "$randomization/final/randomization_STEM_mainlist_9Oct2021.xlsx", sheet("Sheet1") firstrow
duplicates drop mentorid, force 
drop menteeid name phone b_* sex
g mentor_name_original = mentor_name
replace mentor_name = trim(itrim(lower(mentor_name)))
replace mentor_name = subinstr(mentor_name," ","",.)  
replace mentor_name = subinstr(mentor_name,",","",.)   //Removes comma (,)
replace mentor_name = subinstr(mentor_name,"'","",.)   //Removes apostrophe (')
replace mentor_name = subinstr(mentor_name,".","",.)   //Removes dot (.) 
replace mentor_name = subinstr(mentor_name,"/","",.)   //Removes slash (/)
replace mentor_name = subinstr(mentor_name,"-","",.)   //Removes dash (-)
replace mentor_name = subinstr(mentor_name,"=","",.)   //Removes dash (-)
replace mentor_name = subinstr(mentor_name,"(","",.)   //Removes opening parentheses
replace mentor_name = subinstr(mentor_name,")","",.)   //Removes closing parentheses
replace mentor_name = subinstr(mentor_name,"mynameis","",.)   //Removes irrelevant info
replace mentor_name = subinstr(mentor_name,"@gmailcom","",.)   //Removes irrelevant info
replace mentor_name = subinstr(mentor_name,"@gemailcom","",.)   //Removes irrelevant info
format mentor_name %25s

replace mentor_email = trim(itrim(lower(mentor_email)))

g mentor_phone1 = substr(mentor_phone, 1, 8)
g mentor_phone2 = substr(mentor_phone, 10, 8)
drop mentor_phone 
replace mentor_phone1="17929965" if mentor_phone1=="97517929" // 975 is country code

rename mentor_phone1 mentor_phone 
rename mentor_email mentor_emailr 

// merge data using phone and name
merge 1:1 mentor_phone mentor_name using $temp/temp, keepusing(mentor_phone mentor_name* mentor_email responseid) 

* matched data 
preserve 
keep if _merge==3
drop _merge 
save $temp/temp1m, replace 
restore 

* unmatched data 
preserve 
keep if _merge==1 
drop _merge responseid 
rename mentor_name mentor_namer
save $temp/temp1r, replace 
restore 

preserve 
keep if _merge==2 
drop _merge mentorid 
rename mentor_name mentor_names
rename mentor_email mentor_emails
save $temp/temp1s, replace 
restore 

// merge unmatched data using phone and fuzzy name
use $temp/temp1r, clear 
merge 1:m mentor_phone using $temp/temp1s, keepusing(mentor_phone* mentor_name* responseid mentor_email*) 
matchit mentor_names mentor_namer, g(namescore)

preserve 
keep if namescore>0.5
drop _merge namescore 
save $temp/temp2m, replace 
restore 

* unmatched data 
preserve 
keep if _merge==2 
drop _merge mentorid
rename mentor_phone mentor_phones
keep mentor_names mentor_emails responseid mentor_phones 
save $temp/temp2s, replace 
restore 

preserve 
keep if _merge==1 
drop _merge responseid
rename mentor_phone mentor_phoner
keep mentor_namer mentor_emailr mentor_phoner mentorid
save $temp/temp2r, replace 
restore

// merge unmatched data using name and fuzzy email
use $temp/temp2r, clear
rename mentor_namer mentor_names 
merge 1:m mentor_names using $temp/temp2s
matchit mentor_phoner mentor_phones, g(phonescore)
matchit mentor_emailr mentor_emails, g(emailscore)

preserve 
keep if emailscore>0.5
drop _merge *score 
save $temp/temp3m, replace 
restore 

* unmatched data 
preserve 
keep if _merge==1 | emailscore<0.5 & _merge==3
drop _merge 
rename mentor_names mentor_namer
keep *r mentorid 
save $temp/temp3r, replace 
restore 

preserve 
keep if _merge==2 | emailscore<0.5 & _merge==3
drop _merge 
keep *s responseid 
save $temp/temp3s, replace 
restore

// Append data together 
clear
append using $temp/temp1m
append using $temp/temp2m
append using $temp/temp3m
append using $temp/temp3r
unique mentorid

keep mentorid responseid 

replace responseid = 419 if mentorid=="MSJNE11" // Sonam Tashi, matched date of birth information in the application package  


replace responseid = 991 if mentorid=="MSCST21"
replace responseid = 992 if mentorid=="MSCST06"
replace responseid = 993 if mentorid=="MSGCI13"
replace responseid = 994 if mentorid=="MSGCI26"
replace responseid = 996 if mentorid=="MSGCI29"
replace responseid = 997 if mentorid=="MSJNE12"

merge 1:1 responseid using $temp/temp, keep(matched master) nogen

* manually check information of seven mentors that did not appear
* in the online survey (these are available from the application package)

// 142 
	replace mentor_name = "Nabindra Sharma" if mentorid=="MSCST21"
	replace mentor_sex = 1 if mentorid=="MSCST21"
	replace mentor_email = "nabin15sharma@gmail.com" if mentorid=="MSCST21"
	replace mentor_bday = 4 if mentorid=="MSCST21"
	replace mentor_bmonth = "February" if mentorid=="MSCST21"
	replace mentor_byear = 1994 if mentorid=="MSCST21"
	replace mentor_school = "Tendruk Higher Secondary School" if mentorid=="MSCST21"
	replace mentor_phone = "17911285" if mentorid=="MSCST21"
	*replace mentor_dist_current = "Samtse" if mentorid=="MSCST21"


// 303
	replace mentor_name = "Sangay Rinchen" if mentorid=="MSCST06"
	replace mentor_sex = 1 if mentorid=="MSCST06"
	replace mentor_email = "sangayrinchennamgyal98@gmail.com" if mentorid=="MSCST06"
	replace mentor_bday = 23 if mentorid=="MSCST06"
	replace mentor_bmonth = "June"  if mentorid=="MSCST06"
	replace mentor_byear = 1998 if mentorid=="MSCST06"
	replace mentor_school = "Tenzin Higher Secondary School" if mentorid=="MSCST06"
	replace mentor_phone = "17413396" if mentorid=="MSCST06"
	*replace mentor_dist_current = "" if mentorid=="MSCST06"

// 255 
	replace mentor_name = "Sonam Tashi" if mentorid=="MSGCI13"
	replace mentor_sex = 1  if mentorid=="MSGCI13"
	replace mentor_email = "05190038.jnec@rub.edu.bt"  if mentorid=="MSGCI13"
	replace mentor_bday = 15  if mentorid=="MSGCI13"
	replace mentor_bmonth = "July"  if mentorid=="MSGCI13"
	replace mentor_byear = 2000  if mentorid=="MSGCI13"
	replace mentor_school = "Jigmesherabling Central School"  if mentorid=="MSGCI13"
	replace mentor_phone = "17965285"  if mentorid=="MSGCI13"
	*replace mentor_dist_current = "Samdrup Jongkhar"  if mentorid=="MSGCI13"

// application #182: checked with Cheku 
	replace mentor_name = "Sonam Choki" if mentorid=="MSGCI26" 
	replace mentor_sex = 0 if mentorid=="MSGCI26" 
	replace mentor_email = "sonamcho505@gmail.com"  if mentorid=="MSGCI26" 
	replace mentor_bday = 27  if mentorid=="MSGCI26" 
	replace mentor_bmonth = "April"  if mentorid=="MSGCI26" 
	replace mentor_byear = 2001  if mentorid=="MSGCI26" 
	replace mentor_school = "Nangkor Central School"  if mentorid=="MSGCI26" 
	replace mentor_phone = "17983097"  if mentorid=="MSGCI26" 
	*replace mentor_dist_current = "Samdrup Jongkhar"  if mentorid=="MSGCI26" 

// application #187: checked with Cheku
	replace mentor_name = "Tshering Jurmey" if mentorid=="MSGCI29" 
	replace mentor_sex = 1 if mentorid=="MSGCI29" 
	replace mentor_email = "tsheringjurmey74@gmail.com" if mentorid=="MSGCI29" 
	replace mentor_bday = 10 if mentorid=="MSGCI29" 
	replace mentor_bmonth = "August" if mentorid=="MSGCI29" 
	replace mentor_byear = 1999 if mentorid=="MSGCI29" 
	replace mentor_school = "Jampeling Higher Secondary School" if mentorid=="MSGCI29" 
	replace mentor_phone = "17532757" if mentorid=="MSGCI29" 
	*replace mentor_dist_current = "Trashigang" if mentorid=="MSGCI29" 

// application #122: checked with Cheku 
	replace mentor_name = "Yeshi Choden" if mentorid=="MSJNE12" 
	replace mentor_sex = 0 if mentorid=="MSJNE12" 
	replace mentor_email = "05180017.jnec@rub.edu.bt" if mentorid=="MSJNE12" 
	replace mentor_bday = 20 if mentorid=="MSJNE12" 
	replace mentor_bmonth = "May" if mentorid=="MSJNE12" 
	replace mentor_byear = 1998 if mentorid=="MSJNE12" 
	replace mentor_school = "Nangkor Central School" if mentorid=="MSJNE12" 
	replace mentor_phone = "17738285" if mentorid=="MSJNE12" 
	*replace mentor_dist_current = "" if mentorid=="MSJNE12" 

// application #128: checked with Cheku 
	replace mentor_name = "Sapna Jogi" if mentorid=="MSGCI13"
	replace mentor_sex = 0 if mentorid=="MSGCI13"
	replace mentor_email = "sapnajog@gmail.com" if mentorid=="MSGCI13"
	replace mentor_bday = 11 if mentorid=="MSGCI13"
	replace mentor_bmonth = "August" if mentorid=="MSGCI13"
	replace mentor_byear = 2001 if mentorid=="MSGCI13"
	replace mentor_school = "Losel Gatsho Academy" if mentorid=="MSGCI13"
	replace mentor_phone = "17382063" if mentorid=="MSGCI13"
	*replace mentor_dist_current = "" if mentorid=="MSGCI13"
	
gen mentor_age = 2021 - mentor_byear

lab def sat 1 "not at all" 2 "slighly" 3 "moderately" 4 "very" 5 "extremely" 99 "decline to answer"
lab val mentor_beenmentee_sat sat

lab def experience 1 "none" 2 "1-2 persons" 3 "3-5 persons" 4 "6-10 persons" 5 "more than 10" 99 "don't know"
lab val mentor_exp experience
lab val mentor_expstem experience

lab def performance 1 "very bad" 2 "bad (21-40%)" 3 "moderate (41-60%)" 4 "good (61-80%)" 5 "very good (81-100%)" 99 "NA"
lab val mentor_emath performance 
lab val mentor_ephys performance
lab val mentor_echem performance
lab val mentor_ebio performance

lab def recommendation 1 "strongly not" 2 "not" 3 "neutral" 4 "recommend" 5 "strongly recommend"
lab val mentor_progrec recommendation

ren mentor_sex mentor_male
lab var mentor_age "Age" 
lab var mentor_male "Male" 
lab var b5extro "Big 5 Extroversion" 
lab var b5agree "Big 5 Agreeness" 
lab var b5consci "Big 5 Conscientiousness"
lab var b5neuro "Big 5 Neuroticism"
lab var b5open "Big 5 Openness"

recode mentor_years (99=.)

drop responseid 
save "$clean/STEM_mentor.dta", replace



 



